*********************************************
* Title: rwanda_student_baseline_modify2.do
* Author: Todd Pugatch
* Last update: 6 Sep 2019
* Description: modifies and expands data on students from Rwanda entrepreneurship & teacher training baseline
* Inputs: 	Student_Survey_cleaned_NOPII[v12].dta
*			Teacher_Survey_cleaned_modifyv2.dta 
*			Student_Survey_cleaned_linked_NOPIIs.dta
*			Section14_Q1400-1401_Cleaned_V12.dta
*			rwanda_student_baseline_modify1_aux1.do
* Outputs: Student_Survey_cleaned_modifyv2.[dta/txt]
* Notes: produced in preparation for JDE Registered Report Stage 2 and endline report
*	--differs from rwanda_student_baseline_modify1.do in: 
*		--referencing baseline survey in variable name (bht for baseline head teacher)
*		--referencing survey question number in variable label 
*		--adding variable notes
*		--following JDE Stage 1 Registered Report conventions on missing variables
*	--see "Student Survey - ENG_Final.pdf" and "Student Survey - KRW_Final.pdf" for questionnaires 
*		--in JDE Registered Report folder as "baselinesurveystudent.pdf"
*	--Many missing variables recoded as zero here, because in these cases a missing
*		means "not applicable" (for instance, primary school enrollment if there is
*		no attached primary school, NGO contributions to the school if there are none,
*		etc.). This goes against JDE RR Stage 1 statement of using control group
*		mean when items are missing, but is more sensible.
****************************************************************************
#delimit;
local start=`"$S_TIME"';
clear;
clear matrix;
clear mata;
graph drop _all;
program drop _all;
cap log close;
set more off;

* define file paths;
scalar define mode=1; /*1=Todd laptop, 2=Todd desktop, 3=Todd desktop 2, 4=Moussa*/

/*run this program before opening log file or saving output*/
program cd_output;
	if (mode==1|mode==2|mode==3) qui cd "C:\Users\pugatcht\Box\IPA_RWA_Project_Educate\10_Analysis&Results\01 Data analysis\logs\jde_rr";
end;

/*run this program before opening cleaned student data*/
program cd_indata;
	if (mode==1|mode==2|mode==3) qui cd "C:\Users\pugatcht\Box\IPA_RWA_Project_Educate\07_Questionnaires&Data\01 Baseline_Quantitative\03_DataManagement\04 Data\Baseline Data\02_Cleaned_Data\Student";
end;

/*run this program before opening baseline student data with linked teacherid*/
program cd_linkdata;
	if (mode==1|mode==2|mode==3) qui cd "C:\Users\pugatcht\Box\IPA_RWA_Project_Educate\07_Questionnaires&Data\01 Baseline_Quantitative\03_DataManagement\04 Data\Baseline Data\02_Cleaned_Data\Linked\Data_NOPIIs";
end;

/*run this program before opening cleaned & modified teacher data or saving modified data*/
program cd_outdata;
	if (mode==1|mode==2|mode==3) qui cd "C:\Users\pugatcht\Box\IPA_RWA_Project_Educate\10_Analysis&Results\01 Data analysis\usedata";
end;

/*run this program before running do-file to enter creativity ratings*/
program cd_do;
	if (mode==1|mode==2|mode==3) qui cd "C:\Users\pugatcht\Box\IPA_RWA_Project_Educate\10_Analysis&Results\01 Data analysis\dofiles";
end;

* define exchange rate to convert all variables in FRW into USD;
local xrate=763.5759; /*exchange rate on 1 Mar 2016: http://www.exchangerates.org.uk/USD-RWF-exchange-rate-history-full.html*/

* LOAD AND PREPARE DATA;
/*variable naming and label conventions: 
	--names:	use [bl/el] suffix for baseline/endline
				omit if variable is an identifier that could be used to match
					across datasets, like treatment status or school code
	--labels: 	use B/E for baseline/endline 
				use [H/T/S]Q for head teacher/teacher/student questionnaire
	--example: enrollment, as reported by head teacher at baseline in item #123
		name: enroll_bl
		label: "BHQ123: enrollment"*/
cd_indata;
qui use Student_Survey_cleaned_NOPII, clear;

* SECTION 1: identifying information;
/*correct coding errors, according to "Manual Checking Report.docx" (Jean Bosco email 24 Nov 2016)*/
/*change GS Mubuga to ES Mubuga, GS NDA Rwaza to GS Marie Reine Rwaza*/
lab def schoollab 164 "E.S. MUBUGA", modify;
qui replace school_str="E.S. MUBUGA" if school==164;
qui replace school_str="G.S. MARIE REINE RWAZA" if school==139;
/*change district for GS Karongi to Karongi district*/
qui replace district=9 if school_code_106==301135;
qui replace sector=111 if school_code_106==301135;
qui replace cell=164 if school_code_106==301135;
/*update code for GS Mukono (was incorrectly labeled 504102) and GS Zaza A (incorrectly labeled 605118)*/
qui replace school_code_106=405102 if school==95;
qui replace school_code_106=506118 if school==80;
/*update discrepancies between treatment status between sampling spreadsheet and dataset*/
/*note also discrepancies in Educate!'s handling of these schools, as explained in Meghan Mahoney email to Todd 8 Dec 2016*/
qui replace group_107=2 if school_code_106==301112; 	/*GS Manji: control. E! status: treatment*/
qui replace group_107=1 if school_code_106==403110; 	/*GS Kabushinge: treatment. E! status: control*/
qui replace group_107=1 if school_code_106==403037; 	/*GS NDA Rwaza: treatment. E! status: control*/
qui replace group_107=1 if school_code_106==403050; 	/*GS Marie Reine Rwaza: treatment*/
qui replace group_107=1 if school_code_106==305012;		/*ES Muhororo: treatment. E! status: treatment*/
qui replace group_107=1 if school_code_106==504114;		/*GS Nyawera: treatment. E! status: treatment*/
/*unassigned schools added as replacements
qui replace group_107=. if school_code_106==305112;		/*GS Muhororo: unassigned. E! status: control*/
qui replace group_107=. if school_code_106==304102;		/*GS Gihira: unassigned.*/
qui replace group_107=. if school_code_106==403121;		/*GS Nyakinama: unassigned.*/
qui replace group_107=. if school_code_106==503118;		/*GS Nyarubuye: unassigned.*/
*/

/*merge with school-level data from teacher survey (e.g., public & strata indicators)*/
local teachervars "public strata";
cd_outdata;
qui gen long school_code=school_code_106;  /*long format necessary because GS Rebero has school code longer than 7 digits*/
qui save studenttemp, replace;
qui use Teacher_Survey_cleaned_modifyv2, clear;
keep school_code `teachervars';
qui save teachertemp, replace;
qui use studenttemp, clear;
qui merge m:1 school_code using teachertemp, keepusing(`teachervars') force;
list school if _merge!=3;
drop if school_code_106==.;
drop _merge;

qui gen schoolname_bl=school;
lab var schoolname_bl "school name, as reported in baseline (school)";
lab val schoolname_bl schoollab;

/*create numeric student ID variable:
	Variable "uniqueid" is string variable of form: 0[school_code]st_[studentnumber]. Convert to numeric, in hopes of 
	matching with student id variable in endline.*/
qui gen uniqueid2=subinstr(uniqueid,"st_","",.);
qui destring uniqueid2, gen(studentid);	
format studentid %12.0g;
drop uniqueid2;

* check for duplicates of studentid;
qui drop if studentid==.|school_code==.;
duplicates report studentid;
duplicates list studentid;
duplicates tag student, gen(dupes);
sort studentid school_code;
list school_code schoolname_bl studentid if dupes==1;
forval i=1/15 {;
	qui replace studentid=504102`i' if studentid==405102`i' & school_code==504102;
};
	
/*Merge with baseline teacherid variable. See Jean Aime email to Todd 5 Sept 2019 for explanation.*/
qui save studenttemp, replace;
cd_linkdata;
qui use Student_Survey_cleaned_linked_NOPIIs, clear;
qui destring uniqueid, gen(studentid);
format studentid %12.0g;

* fix duplicate studentid issue between:
forval i=1/15 {;
	qui replace studentid=504102`i' if studentid==405102`i' & school_code==504102;
};
qui drop if studentid==.|school_code_106==.;
foreach x in teacherid same_teacher {;
	ren `x' `x'_bl;
};
lab var teacherid_bl "teacherid, baseline";
keep studentid teacherid_bl same_teacher_bl;
cd_outdata;
qui save linktemp, replace;
qui use studenttemp, clear;

* merge with full baseline student data;
qui merge 1:1 studentid using linktemp;
qui drop if _merge==2;
drop _merge;
	
/*create district & province IDs to match Census/map codes*/
qui gen district_id=.;
qui replace district_id=57 if district==1; /*Bugesera*/
qui replace district_id=53 if district==2; /*Gatsibo*/
qui replace district_id=54 if district==3; /*Kayonza*/
qui replace district_id=56 if district==4; /*Ngoma*/
qui replace district_id=42 if district==5; /*Gakenke*/
qui replace district_id=45 if district==6; /*Gicumbi*/
qui replace district_id=43 if district==7; /*Musanze*/
qui replace district_id=41 if district==8; /*Rulindo*/
qui replace district_id=31 if district==9; /*Karongi*/
qui replace district_id=35 if district==10; /*Ngororero*/
qui replace district_id=34 if district==11; /*Nyabihu*/
lab var district_id "District ID, Census code";

qui gen province_id=.;
qui replace province_id=5 if province==1; /*Eastern*/
qui replace province_id=4 if province==2; /*Northern*/
qui replace province_id=3 if province==3; /*Western*/
lab var province_id "Province ID, Census code";

/*treatment status*/
* check that all students within a school assigned to same treatment status;
bysort school_code_106: egen x=mode(group_107);
list school school_code_106 group_107 if group_107!=x;
drop x;
* NEED TO CHECK WHY IMBALANCE OF TREATMENT/CONTROL;
qui gen treatment=(group_107==1);
qui replace treatment=. if group_107==.;
qui egen tagged=tag(school_code_106);
tab group_107, mi;
tab group_107 if tagged==1, mi;
lab def treatment 0 "control" 1 "treatment";
lab val treatment treatment; 
drop tagged;

/*alternate versions of treatment (see "Manual Checking Report.docx" for more details)*/
* alternate version 1: code unassigned replacement schools as missing;
qui gen treatment_unassgn=treatment;
qui replace treatment_unassgn=. if school_code_106==305112;	/*GS Muhororo: unassigned. E! status: control*/
qui replace treatment_unassgn=. if school_code_106==304102;	/*GS Gihira: unassigned.*/
qui replace treatment_unassgn=. if school_code_106==403121;	/*GS Nyakinama: unassigned.*/
qui replace treatment_unassgn=. if school_code_106==503118;		/*GS Nyarubuye: unassigned.*/
lab var treatment_unassgn "treatment status (unassigned replacement schools as missing)";

* alternative version 2: code schools with mismatched status between E! and initial assignment;
/*this mismatch was based on IPA error, not E! choice*/
qui gen treatment_educateassgnt=treatment;
qui replace treatment_educateassgnt=1 if school_code_106==301112; 	/*GS Manji: control. E! status: treatment*/
qui replace treatment_educateassgnt=0 if school_code_106==403110; 	/*GS Kabushinge: treatment. E! status: control*/
qui replace treatment_educateassgnt=0 if school_code_106==403037; 	/*GS NDA Rwaza: treatment. E! status: control*/
/*additional mismatches based on E! error, not IPA assignment choice (see Meghan Mahoney email to Todd Pugatch, 10 January 2017)*/
qui replace treatment_educateassgnt=1 if school_code_106==503014; 	/*ES Gasange: control. E! status: treatment*/
qui replace treatment_educateassgnt=0 if school_code_106==503104; 	/*GS Gasange: treatment. E! status: control*/
lab var treatment_educateassgnt "treatment status (based on Educate! records, not initial assignment)";
foreach x in unassgn educateassgnt {;
	lab val treatment_`x' treatment;
};

* SECTION 2: student home life;
* not coded:	specific home location (q201)
				transportation to school (q204);
qui gen rural_bl=(loc_201==1);
qui gen homeowner_bl=(house_201==2);
qui gen boarding_bl=(board_day_202==1);
qui gen home_far_bl=((dist_km_203>=10 & dist_km_203!=.)|(dist_min_203>=60 & dist_min_203!=.));
lab var rural_bl "BSQ201: student's home in rural area"; 
lab var homeowner_bl "BSQ201: student's family owns their home";
lab var boarding_bl "BSQ202: boarding student";
lab var home_far_bl "BSQ203: home at least 10km or 1hr away";

* SECTION 3: Student SES;
/*age, gender, parents*/
qui gen age_bl=age_300;
*qui replace age_topcode=25 if age_300>25 & age_300!=.;
qui replace age_bl=. if age_300==-99;
qui gen female=(gender_301==2);
qui gen bothparents_bl=(parents_302==3);
qui gen motheronly_bl=(parents_302==2);
qui gen fatheronly_bl=(parents_302==1);
qui gen orphan_bl=(parents_302==4);
lab var age_bl "BSQ300: age (-99 recoded to missing)";
lab var bothparents_bl "BSQ302: has both parents";
lab var motheronly_bl "BSQ302: has mother only";
lab var fatheronly_bl "BSQ302: has father only";
lab var orphan_bl "BSQ302: has neither biological parent";

/*household assets*/
qui gen dirtfloor_bl=(floor_mat_303==1);
qui gen roof_iron_bl=(roof_mat_304==3);
qui gen roof_cement_bl=(roof_mat_304==5);
qui gen water_piped_bl=(water_305==1);
qui gen water_unprotected_bl=(water_305==6|water_305==7);
local assets "assets_306_Radio assets_306_Television assets_306_Telephone assets_306_Refrigerator assets_306_Bicycle 
	assets_306_Motorcycle assets_306_PrivateCar";
qui egen assets_pct_bl=rowmean(`assets');
qui pca `assets';
qui predict assets_pc1_bl;
qui gen cook_wood_bl=(cook_307==1);
qui gen light_electric_bl=(light_308==1|light_308==2);
lab var dirtfloor_bl "BSQ303: floor of home made of earth/mud";
lab var roof_iron_bl "BSQ304: roof of home made of iron";
lab var roof_cement_bl "BSQ304: roof of home made of cement/concrete/clay/tiles";
lab var water_piped_bl "BSQ305: piped water in household";
lab var water_unprotected_bl "BSQ305: unprotected water source in household";
lab var assets_pct_bl "BSQ306: proportion of HH assets owned, of 7";
lab var assets_pc1_bl "BSQ306: asset index (1st principal component)";
lab var cook_wood_bl "BSQ307: household cooking source is wood";
lab var light_electric_bl "BSQ308: household light source is electricity or generator";

/*parental & sibling occupations and education*/
foreach x in fath moth {;
	qui gen `x'_ag_bl=(occup_`x'er_309==1);
	qui gen `x'_business_bl=(occup_`x'er_309==2);
	qui gen `x'_pro_bl=(occup_`x'er_309==6);
};
qui gen moth_work_bl=(occup_mother_309>=1 & occup_mother_309<=6 & occup_mother_309!=.);
qui gen parent_business_bl=(fath_business_bl==1|moth_business_bl==1);
qui gen parent_pro_bl=(fath_pro_bl==1|moth_pro_bl==1);
foreach x in fath moth {;
	qui gen `x'_primary_bl=(educ_`x'_310==1);
	qui gen `x'_secondary_bl=(educ_`x'_310==2);
	qui gen `x'_tertiary_bl=(educ_`x'_310==3|educ_`x'_310==4);
	qui gen `x'_primary_ormore_bl=(`x'_primary_bl==1|`x'_secondary_bl==1|`x'_tertiary_bl==1);
	qui gen `x'_secondary_ormore_bl=(`x'_secondary_bl==1|`x'_tertiary_bl==1);
};
qui gen sib_secondary_bl=(educ_sibling_310==2);
qui gen sib_tertiary_bl=(educ_sibling_310==3|educ_sibling_310==4);
qui gen sib_secondary_ormore_bl=(sib_secondary_bl==1|sib_tertiary_bl==1);
foreach x in fath moth {;
	lab var `x'_ag_bl "BSQ309: `x'er works in agriculture";
	lab var `x'_business_bl "BSQ309: `x'er works in business";
	lab var `x'_pro_bl "BSQ309: `x'er is professional/salaried";
	foreach s in primary secondary tertiary {;
		lab var `x'_`s'_bl "BSQ310: `x'er completed `s'";
	};
	lab var `x'_primary_ormore_bl "BSQ310: `x'er completed primary or more";
	lab var `x'_secondary_ormore_bl "BSQ310: `x'er completed secondary or more";
};
lab var moth_work_bl "BSQ309: mother works";
lab var parent_business_bl "BSQ309: at least one parent in business";
lab var parent_pro_bl "BSQ309: at least one parent is professional/salaried";
lab var sib_secondary_bl "BSQ310: sibling or other in HH completed secondary";
lab var sib_secondary_bl "BSQ310: sibling or other in HH completed tertiary";
lab var sib_secondary_ormore_bl "BSQ310: sibling or other in HH completed secondary or tertiary";

/*SES index: first principal component of household assets, parents' education, 
	and indicator for parents in business or professional occupation.*/
local ses "`assets' moth_primary_bl moth_secondary_bl moth_tertiary_bl fath_primary_bl 
	fath_secondary_bl fath_tertiary_bl parent_business_bl parent_pro_bl";
qui pca `ses';
qui predict ses_pc1_bl;	
lab var ses_pc1_bl "BSQ303-310: SES index (1st principal component of assets, parent education, parent occupation)";

/*household businesses*/
* note also bus_info_212bus_details_c__312 with detailed list of business is very interesting;
qui gen HH_business_bl=(business_311==1);
qui gen HH_business_employs_bl=(bus_info_212bus_details_b__312>1 & bus_info_212bus_details_b__312!=.);
qui replace HH_business_employs_bl=. if business_311!=1;
lab var HH_business_bl "BSQ311: household member owns a business";
lab var HH_business_employs_bl "BSQ312: household business has more than 1 paid employee";

* SECTION 4: Academic background;
* need assistance with coding academic option/stream (opt_400, stream_400);
* need to create codes to match teacher (entr_teach_401) to teacher survey;
qui gen repeat_S4_bl=(repeating_402==2|repeating_402==3);
qui gen reptimes_bl=reptimes_403;
qui replace reptimes_bl=. if reptimes_bl==-99;
qui gen S3_exam_bl=aggr_404;
qui replace S3_exam_bl=. if aggr_404==-99;
qui gen S3_exam_miss_bl=(S3_exam_bl==.);
qui gen S3_math_bl=math_405;
qui replace S3_math_bl=. if math_405==-99;
lab var repeat_S4_bl "BSQ402: repeating S4";
lab var reptimes_bl "BSQ403: number of times repeated a class/level since starting school";
lab var S3_exam_bl "BSQ404: Aggregate score on S3 exam";
lab var S3_exam_miss_bl "BSQ404: missing value for S3 aggregate exam score";
lab var S3_math_bl "BSQ405: Aggregate score on S3 math exam";

* SECTION 5: labor market and entrepreneurial experience;
/*list of jobs/businesses in source_501_other is interesting*/
/*also 502: frequency of earning money*/
qui gen earn_money_bl=(earn_500==1);
qui gen personal_business_bl=((source_501==1|source_501==3) & earn_money_bl==1);
qui gen employed_bl=((source_501==2|source_501==3) & earn_money_bl==1);
qui gen earn_last2mths_bl=amt_503 if earn_money_bl==1 & amt_503!=-99;
qui replace earn_last2mths_bl=0 if earn_500==0; /*earnings=0 if "No" to earning money question*/
qui gen earn_last2mths_usd_bl=earn_last2mths_bl/`xrate'; /*exchange rate on 1 Mar 2016: http://www.exchangerates.org.uk/USD-RWF-exchange-rate-history-full.html*/
lab var earn_money_bl "BSQ500: currently earning money (excluding pocket money)";
lab var personal_business_bl "BSQ501: earns money from personal business";
lab var employed_bl "BSQ501: earns money from employment";
lab var earn_last2mths_bl "BSQ503: earnings from business & employment, last 2 months, FRW";
lab var earn_last2mths_usd_bl "BSQ503: earnings from business & employment, last 2 months, USD";

* SECTION 6: business creation;
/*list of businesses in ifyes_ownbusns_600profbus_601_ot is interesting*/
qui gen ownbusiness_bl=(ownbusns_600==1);
qui gen ownbusiness_ag_bl=((ifyes_ownbusns_600profbus_601==1|ifyes_ownbusns_600profbus_601==1) & ownbusns_600==1);
qui gen ownbusiness_nonag_bl=(ownbusiness_ag_bl==0 & ownbusns_600==1);
qui gen ownbusiness_yr_bl=((ifyes_ownbusns_600months>=12 & ifyes_ownbusns_600months!=.)|
	(ifyes_ownbusns_600days_602>=365 & ifyes_ownbusns_600days_602!=.) & ownbusns_600==1);
qui gen ownbusiness_grpprj_bl=(ifyes_ownbusns_600grpproj_603==1 & ownbusns_600==1);
order ifyes_ownbusns_600with_604_?;
qui egen buspartners_family_bl=anymatch(ifyes_ownbusns_600with_604_1-ifyes_ownbusns_600with_604_4), v(1 2 4);
qui egen buspartners_school_bl=anymatch(ifyes_ownbusns_600with_604_1-ifyes_ownbusns_600with_604_4), v(6);
qui egen buspartners_none_bl=anymatch(ifyes_ownbusns_600with_604_1-ifyes_ownbusns_600with_604_4), v(10);
qui gen startcapital_bl=ifyes_ownbusns_600amt_605 if ifyes_ownbusns_600amt_605>=0 & ownbusns_600==1;
qui gen startcapital_usd_bl=startcapital_bl/`xrate'; /*exchange rate on 1 Mar 2016: http://www.exchangerates.org.uk/USD-RWF-exchange-rate-history-full.html*/
order ifyes_ownbusns_600getmoney_606?;
qui egen howgotcapital_family_bl=anymatch(ifyes_ownbusns_600getmoney_6061-ifyes_ownbusns_600getmoney_6063), v(1 2 4);
qui egen howgotcapital_school_bl=anymatch(ifyes_ownbusns_600getmoney_6061-ifyes_ownbusns_600getmoney_6063), v(6);
qui egen howgotcapital_none_bl=anymatch(ifyes_ownbusns_600getmoney_6061-ifyes_ownbusns_600getmoney_6063), v(10);
qui gen business_earn_bl=(ifyes_ownbusns_600earn_607==1 & ownbusns_600==1);
qui gen business_inc_last2mths_bl=ifyes_ownbusns_600amtbus_608 if ifyes_ownbusns_600amtbus_608>=0 & business_earn_bl==1;
qui gen business_inc_last2mths_usd_bl=business_inc_last2mths_bl/`xrate'; /*exchange rate on 1 Mar 2016: http://www.exchangerates.org.uk/USD-RWF-exchange-rate-history-full.html*/
qui gen business_inc_last2mths_any_bl=(business_inc_last2mths_bl>0 & business_inc_last2mths_bl!=.);
qui gen business_keep_bl=(ifyes_ownbusns_600contbus_609==1 & ownbusns_600==1);
foreach x in ag yr grpprj {;
	qui replace ownbusiness_`x'_bl=. if ownbusns_600!=1;
};
foreach y in buspartners howgotcapital {;
	foreach x in family school {;
		qui replace `y'_none_bl=0 if `y'_`x'_bl==1;
	};
	/* let these be unconditional
	foreach x in family school none {;
		qui replace `y'_`x'_bl=. if ownbusns_600!=1;
	};
	*/
};
/*
foreach x in business_keep {;
	qui replace `x'_bl=. if ownbusns_600!=1;
};
*/
lab var ownbusiness_bl "BSQ600: owns a business back home, started by self";
lab var ownbusiness_ag_bl "BSQ601: own business in agriculture";
lab var ownbusiness_nonag_bl "BSQ601: own non-agricultural business";
lab var ownbusiness_yr_bl "BSQ602: owned business at least 1 year";
lab var ownbusiness_grpprj_bl "BSQ603: started business as group project";
foreach x in family school none {;
	lab var buspartners_`x'_bl "BSQ604: business partners: `x'";
	lab var howgotcapital_`x'_bl "BSQ606: how got starting capital: `x'";
};
lab var startcapital_bl "BSQ605: starting capital for business (FRW)";
lab var startcapital_usd_bl "BSQ605: starting capital for business (USD)";
lab var business_earn_bl "BSQ607: owns a business that earns money";
lab var business_inc_last2mths_bl "BSQ608: income from business in last 2 months (FRW)";
lab var business_inc_last2mths_usd_bl "BSQ608: income from business in last 2 months (USD)";
lab var business_keep_bl "BSQ609: plans to continue current business";

* SECTION 7: employment at home;
/*activ_706_other gives intersting list of home activities*/
qui gen job_holiday_bl=(paid_700==1);
qui gen jobsrch_holiday_bl=(seekjob_701_1==0);
qui replace jobsrch_holiday_bl=. if job_holiday_bl==1;
qui gen dayswork_holiday_bl=dayspaid_702 if job_holiday_bl==1;
qui replace dayswork_holiday_bl=60 if dayswork_holiday_bl>60 & dayswork_holiday_bl!=.;
qui gen earnday_holiday_bl=earnday_703 if earnday_703>=0 & job_holiday_bl==1;
qui gen earnday_holiday_usd_bl=earnday_holiday_bl/`xrate'; /*exchange rate on 1 Mar 2016: http://www.exchangerates.org.uk/USD-RWF-exchange-rate-history-full.html*/
qui gen parttime_schl_bl=(ptimework_704_1==1|ptimework_704_1==2);
qui gen parttime_schl_same_bl=(ptimework_704_1==2);
qui gen parttime_schl_diff_bl=(ptimework_704_1==1);
qui gen work_nopay_bl=(withoutpay_705==1|withoutpay_705==2);
qui gen work_nopay_fam_bl=(withoutpay_705==1);
order activ_706_?;
qui egen chores_bl=anymatch(activ_706_1-activ_706_3), v(1 2 4 5 6 7 9);
qui gen pocketmoney_bl=(pcktmoney_707==1);
lab var job_holiday_bl "BSQ700: paid to work by someone else last holiday";
lab var jobsrch_holiday_bl "BSQ701: searching for work last holiday (if not employed)";
lab var dayswork_holiday_bl "BSQ702: days worked last holiday (top-coded at 60)";
lab var earnday_holiday_bl "BSQ703: daily earnings last holiday (FRW)";
lab var earnday_holiday_usd_bl "BSQ703: daily earnings last holiday (USD)";
lab var parttime_schl_bl "BSQ704: working part-time for pay during school term";
lab var parttime_schl_same_bl "BSQ704: working part-time for pay during school term, same job as holidays";
lab var parttime_schl_diff_bl "BSQ704: working part-time for pay during school term, different job than holidays";
lab var work_nopay_bl "BSQ705: working without pay";
lab var work_nopay_fam_bl "BSQ705: working without pay for family business";
lab var chores_bl "BSQ706: does household chores regularly when at home";
lab var pocketmoney_bl "BSQ707: regularly receives pocket money while at home";

* SECTION 8: Entrepreneurship Skills and Personal Finance;
/*savedmon_804_other list of other sources of savings is interesting*/
qui gen borrow_bl=(borrow_800==1);
qui gen wait_10k_bl=(moneyoffer_801==2);
qui gen wait_20k_bl=(moneyoffer_802==2);
qui replace wait_10k_bl=. if moneyoffer_801==.|moneyoffer_801==-55;
qui replace wait_20k_bl=. if moneyoffer_802==.|moneyoffer_802==-55;
qui gen compound_interest_bl=(savings_803==1);
qui egen anysavings_bl=anymatch(savedmon_804), v(-66 2 3 4 5 6 7);
qui gen savings_less5k_bl=(savedamt_805==1 & savedmon_804!=1);
qui gen savings_5kto10k_bl=(savedamt_805==2 & savedmon_804!=1);
qui gen savings_more10k_bl=(savedamt_805==3 & savedmon_804!=1);
foreach x in less5k 5kto10k more10k {;
	qui gen savings_`x'_cond_bl=savings_`x'_bl if savedmon_804==1;
};
lab var borrow_bl "BSQ800: borrowed to take advantage of economic opportunity";
lab var wait_10k_bl "BSQ801: Prefer 10k FRW one month from now to 5k FRW today";
lab var wait_20k_bl "BSQ802: Prefer 20k FRW one month from now to 5k FRW today";
lab var compound_interest_bl "BSQ803: answer question about compound interest correctly";
lab var anysavings_bl "BSQ804: has money saved";
lab var savings_less5k_bl "BSQ805: savings less than 5k FRW";
lab var savings_5kto10k_bl "BSQ805: savings 5,000-10,000 FRW";
lab var savings_more10k_bl "BSQ805: savings more than 10k FRW";
lab var savings_less5k_cond_bl "BSQ805: savings less than 5k FRW (conditional on savings)";
lab var savings_5kto10k_cond_bl "BSQ805: savings 5,000-10,000 FRW (conditional on savings)";
lab var savings_more10k_cond_bl "BSQ805: savings more than 10k FRW (conditional on savings)";

* SECTION 9: Youth Skill Development Scale;
/*can just summarize distributions of these as is*/

* SECTION 10: Entrepreneurship Knowledge (Based on the curriculum);
/*see Laura Smith email to Todd 23jul2019 for definition of correct responses*/
qui egen mrktskl_true_bl=anycount(mrktskl_1000_DevProBrand mrktskl_1000_Advert mrktskl_1000_VerbalProm), v(1);
qui egen mrktskl_false_bl=anycount(mrktskl_1000_KeepingAccount mrktskl_1000_ConducProQAssurance mrktskl_1000_BusinessReg 
	mrktskl_1000_OpenBid mrktskl_1000_DivLabour), v(1);
qui gen mrktskl_true_pct_bl=mrktskl_true_bl/3;
qui gen mrktskl_false_pct_bl=mrktskl_false_bl/5;
qui egen buspln_true_bl=anycount(busplnskl_1001_BusiName busplnskl_1001_MarketgPlan), v(1);
qui egen buspln_false_bl=anycount(busplnskl_1001_ReceiFromSales busplnskl_1001_PandLStatemt busplnskl_1001_Audit), v(1);
qui gen buspln_true_pct_bl=buspln_true_bl/2;
qui gen buspln_false_pct_bl=buspln_false_bl/3;
qui gen profit_calculation_bl=(proftamt_1002==30000);
qui egen incrproft_true_bl=anycount(incrproft_1003_SellMorePaper incrproft_1003_IncreasPrice incrproft_1003_UseCheaperMat), v(1);
qui egen incrproft_false_bl=anycount(incrproft_1003_EmploySone), v(1);
qui gen incrproft_true_pct_bl=incrproft_true_bl/3;
qui gen incrproft_false_pct_bl=incrproft_false_bl;
qui gen busgrwth_indic_bl=(notbusgrth_1004==4);
qui gen profit_definition_bl=(knownoproft_1005_CostXmtSell==1);
qui gen profit_def_strict_bl=(knownoproft_1005_CostXmtSell==1 & knownoproft_1005_CostXlsSell==0 
	& knownoproft_1005_SellXls50Xs==0 & knownoproft_1005_Sellmt50Xs==0);
lab var mrktskl_true_bl "BSQ1000: number of true marketing skills identified (of 3)"; 	
lab var mrktskl_false_bl "BSQ1000: number of false marketing skills identified (of 5)"; 
lab var mrktskl_true_pct_bl "BSQ1000: % of true marketing skills identified (of 3)"; 	
lab var mrktskl_false_pct_bl "BSQ1000: % of false marketing skills identified (of 5)"; 
lab var buspln_true_bl "BSQ1001 number of true business plan elements identified (of 2)"; 	
lab var buspln_false_bl "BSQ1001: number of false business plan elements identified (of 3)"; 
lab var buspln_true_pct_bl "BSQ1001: % of true business plan elements identified (of 2)"; 	
lab var buspln_false_pct_bl "BSQ1001: % of false business plan elements identified (of 3)"; 
lab var profit_calculation_bl "BSQ1002: can calculate profit from example";
lab var incrproft_true_bl "BSQ1003: number of true ways to increase profit identified (of 3)"; 	
lab var incrproft_false_bl "BSQ1003: number of false ways to increase profit identified (of 1)"; 
lab var incrproft_true_pct_bl "BSQ1003: % of true true ways to increase profit identified (of 3)"; 	
lab var incrproft_false_pct_bl "BSQ1003: % of false true ways to increase profit identified (of 1)";
lab var busgrwth_indic_bl "BSQ1004: correctly identifies indicators of business growth";
lab var profit_definition_bl "BSQ1005: understands definition of profit";
lab var profit_def_strict_bl "BSQ1005: understands definition of profit, inc. no false responses";

qui egen eknowledge_index_bl=rowmean(mrktskl_true_pct_bl buspln_true_pct_bl profit_calculation_bl incrproft_true_pct_bl profit_definition_bl busgrwth_indic_bl);
lab var eknowledge_index_bl "BSQ1000-1005: mean of entrepreneurship knowledge questions";

* SECTION 11: aspirations;
/*occup_1101_other list of other occupations youth aspire to is interesting*/
qui gen planned_schl_bl=0;
qui replace planned_schl_bl=1 if schooling_1100==3;
qui replace planned_schl_bl=2 if schooling_1100==4;
qui replace planned_schl_bl=3 if schooling_1100==5|schooling_1100==6;
qui replace planned_schl_bl=4 if schooling_1100==7;
qui replace planned_schl_bl=5 if schooling_1100==8;
lab def planned_schl_bl 0 "less than S6 or no answer" 1 "S6" 2 "TVET" 3 "diploma or university (A0/A1)" 4 "master's"
	5 "doctorate";
lab val planned_schl_bl planned_schl_bl;
qui gen planned_schl_sec_bl=(planned_schl_bl==1);
qui gen planned_schl_tvet_bl=(planned_schl_bl==2);
qui gen planned_schl_univ_bl=(planned_schl_bl==3);
qui gen planned_schl_mast_bl=(planned_schl_bl==4);
qui gen planned_schl_phd_bl=(planned_schl_bl==5);
qui gen planned_schl_postsec_bl=(planned_schl_bl>=3 & planned_schl_bl!=.);
qui gen planned_occup_ag_bl=(occup_1101==1);
qui gen planned_occup_business_bl=(occup_1101==2);
qui gen planned_occup_pro_bl=(occup_1101==5);
qui gen planned_occup_busorpro_bl=(occup_1101==2|occup_1101==5);
qui gen planned_business_bl=(busnsftr_1102==2);
lab var planned_schl_bl "BSQ1100: highest level of schooling planned to complete";
lab var planned_schl_sec_bl "BSQ1100: highest level of schooling planned to complete: secondary";
lab var planned_schl_tvet_bl "BSQ1100: highest level of schooling planned to complete: TVET";
lab var planned_schl_univ_bl "BSQ1100: highest level of schooling planned to complete: diploma or university";
lab var planned_schl_mast_bl "BSQ1100: highest level of schooling planned to complete: master's";
lab var planned_schl_phd_bl "BSQ1100: highest level of schooling planned to complete: doctorate";
lab var planned_schl_postsec_bl "BSQ100: plans to attend post-secondary";
lab var planned_occup_ag_bl "BSQ1101: planned occupation: agriculture";
lab var planned_occup_business_bl "BSQ1101: planned occupation: business";
lab var planned_occup_pro_bl "BSQ1101: planned occupation: professional/salaried";
lab var planned_occup_busorpro_bl "BSQ1101: planned occupation: business or professional/salaried";
lab var planned_business_bl "BSQ1102: plans to start business after finishing school";

* SECTION 12: locus of control;
/*can also code reasons for head boy/girl response and safety*/
qui gen control_univ_bl=univ_1200;
qui gen control_housework_bl=hswork_1202;
qui gen control_children_bl=child_1204;
qui gen control_headboy_bl=head_1206;
qui gen control_safe_bl=safe_1208;
qui gen control_univ_lowscore_bl=(reas_1201==1);
qui gen control_univ_money_bl=(reas_1201==3);
qui gen control_univ_dontwant_bl=(reas_1201==5);
qui gen control_univ_family_bl=(reas_1201==6);
qui gen control_housework_parents_bl=(reas_1203==1);
qui gen control_children_god_bl=(reas_1205==1);
qui gen control_children_plan_bl=(reas_1205==2);
qui gen control_children_spouse_bl=(reas_1205==3);
qui gen control_children_couple_bl=(reas_1205==4);
foreach x in univ housework children headboy safe {;
	qui replace control_`x'_bl=. if control_`x'_bl<1;
};
qui egen control_avg_bl=rowmean(control_univ_bl control_housework_bl control_children_bl control_headboy_bl control_safe_bl);

lab var control_univ_bl "BSQ1200: locus of control: attend university (1=no control, 10=total control)";
lab var control_housework_bl "BSQ1202: locus of control: amount of housework (1=no control, 10=total control)";
lab var control_children_bl "BSQ1204: locus of control: number of children (1=no control, 10=total control)";
lab var control_headboy_bl "BSQ1206: locus of control: be head boy/girl (1=no control, 10=total control)";
lab var control_safe_bl "BSQ1208: locus of control: safe when walking (1=no control, 10=total control)";
lab var control_avg_bl "BSQ1200/1202/1204/1206/1208: locus of control average: 1=no control, 10=total control";
lab var control_univ_lowscore_bl "BSQ1201: reason for university locus of control: might not have high enough points";
lab var control_univ_money_bl "BSQ1201: reason for university locus of control: might not have enough money";
lab var control_univ_dontwant_bl "BSQ1201: reason for university locus of control: don't want to attend";
lab var control_univ_family_bl "BSQ1201: reason for university locus of control: want to start family instead";
lab var control_housework_parents_bl "BSQ1203: reason for housework locus of control: parents decide";
lab var control_children_god_bl "BSQ1205: reason for children locus of control: God decides";
lab var control_children_plan_bl "BSQ1205: reason for children locus of control: family planning";
lab var control_children_spouse_bl "BSQ1205: reason for children locus of control: spouse decides";
lab var control_children_couple_bl "BSQ1205: reason for children locus of control: we decide as couple";

* SECTION 13: Grit;
/*see Duckworth et al (2007) for original grit scale*/
/*first recode items so that 1=lowest grit, 5=highest*/
qui gen ideasprojx=ideasproj_1300;
qui gen diffintrestx=diffintrest_1301;
qui gen passchangx=passchang_1302;
qui gen newprojx=newproj_1303;
foreach x in ideasproj diffintrest passchang newproj {;
	qui recode `x'x (5=1 "very true")
					(4=2 "true")
					(3=3 "somehow true")
					(2=4 "not so true")
					(1=5 "not true"), gen(`x'_bl);
	drop `x'x;
};
qui egen grit_raw_bl=rowmean(ideasproj_bl diffintrest_bl passchang_bl newproj_bl);
qui pca ideasproj_bl diffintrest_bl passchang_bl newproj_bl;
qui predict grit_pc1_bl;
lab var ideasproj_bl "BSQ1300: New ideas and projects sometimes distract me from older projects (1=very true, 5=not true)";
lab var diffintrest_bl "BSQ1301: Difficult to stay interested in project that takes long time (1=very true, 5=not true)";
lab var passchang_bl "BSQ1302: My interests/passions change year to year (1=very true, 5=not true)";
lab var newproj_bl "BSQ1303: I become interested in new projects every few months (1=very true, 5=not true)";
lab var grit_raw_bl "BSQ1300-1303: Grit index, raw score (mean of 4 items, 1=lowest, 5=highest)";
lab var grit_pc1_bl "BSQ1300-1303: Grit, first principal component";

* SECTION 14: Creativity;
* Score responses according to "East African Youth Creativity Scale - FV.pdf";
* merge cleaned versions of questions 1400-1401;
cd_outdata;
qui gen key=KEY;
qui save studenttemp, replace;

cd_indata;
qui use Section14_Q1400-1401_Cleaned_V12, clear;
local keepvars "proj_desc_1400 specify_1400 idea1_1401 idea2_1401";
keep key `keepvars';

cd_outdata;
merge 1:1 key using studenttemp, force;
drop _merge;

* get distribution of responses for each question;
* responses will then be assigned scores by URAP students (Paul Doehring & Julian Schwierzy);
cd_output;
log using rwanda_student_baseline_modify2.txt, text replace;

/*1400: business project idea*/
labelbook code_1400lab;
tab specify_1400;
tab proj_desc_1400 if specify_1400=="";

/*1401: idea if lost key (two ideas)*/
labelbook code_idea1_1401lab;
tab idea1_1401;

labelbook code_idea2_1401lab;
tab idea2_1401;

/*1402: Picture #1*/

/*1403: Picture #2*/
labelbook drawing_1403;
tab drawing_1403;

/*1404: Picture #3*/
labelbook drawing_1404;
tab drawing_1404;

/*1405: Picture #4*/
labelbook drawing_1405;
tab drawing_1405;

log close;

* enter ratings;
cd_do;
run rwanda_student_baseline_modify1_aux1; 

* assess reconciled ratings;
/*Get pairwise correlation and Cohen's kappa*/
/*kappa between .61-.8 considered "substantial agreement" (see Alison Johnston email to Todd on May 1, 2017*/
foreach x in proj_desc_1400 specify_1400 idea1_1401 idea2_1401 drawing_1403 drawing_1404 drawing_1405 {;
	di "question=`x'";
	qui corr `x'_r1 `x'_r2;
	di "correlation="r(rho);
	
	kap `x'_r1 `x'_r2, tab;
};

* get average rating for each creativity measure;
foreach x in proj_desc_1400 specify_1400 idea1_1401 idea2_1401 drawing_1403 drawing_1404 drawing_1405 {;
	qui egen `x'_avg_bl=rowmean(`x'_r1 `x'_r2);
	lab var `x'_avg_bl "BSQ `x', creativity score (1-4, 4 highest), 2-rater average";
};

* save data;
qui gen insample_bl=1;
lab var insample_bl "in baseline sample";
cd_outdata;
qui compress;
lab data "Student baseline survey (2016), modified from cleaned data";
qui save Student_Survey_cleaned_modifyv2, replace;	

erase teachertemp.dta;
erase studenttemp.dta;
erase linktemp.dta;
local end=`"$S_TIME"'; 
di "`start'";
di "`end'";
